Code
datatable(Anames)Warning in instance$preRenderHook(instance): It seems your data is too big for
client-side DataTables. You may consider server-side processing:
https://rstudio.github.io/DT/server.html
datatable(Anames)Warning in instance$preRenderHook(instance): It seems your data is too big for
client-side DataTables. You may consider server-side processing:
https://rstudio.github.io/DT/server.html
temp <- Anames |>
filter(Name == 'Allison') |>
select(Year:Count) |>
rename(Sex = Gender) |>
pivot_wider(names_from = Year,
values_from = Count) |>
mutate(across(.col = `1997`:`2014`,
.fns = ~replace_na(.x ,
0)))
temp |>
kable(format = "html",
caption = "==>Occurance of the name Allison by state and year<==") |>
kable_classic(html_font = "FiraCode Nerd Font")| Sex | State | 1997 | 1998 | 1999 | 2000 | 2001 | 2002 | 2003 | 2004 | 2005 | 2006 | 2007 | 2008 | 2009 | 2010 | 2011 | 2012 | 2013 | 2014 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| F | AK | 19 | 10 | 13 | 14 | 9 | 14 | 14 | 13 | 10 | 18 | 13 | 15 | 13 | 12 | 8 | 11 | 16 | 10 |
| F | AL | 121 | 148 | 124 | 91 | 82 | 83 | 88 | 68 | 86 | 68 | 78 | 83 | 82 | 71 | 66 | 67 | 66 | 63 |
| F | AR | 78 | 64 | 67 | 72 | 82 | 73 | 61 | 65 | 71 | 79 | 55 | 64 | 73 | 60 | 61 | 50 | 67 | 56 |
| F | AZ | 98 | 103 | 96 | 97 | 98 | 83 | 80 | 94 | 91 | 109 | 91 | 133 | 147 | 126 | 93 | 110 | 108 | 123 |
| F | CA | 565 | 638 | 571 | 538 | 539 | 554 | 545 | 591 | 580 | 581 | 654 | 844 | 973 | 854 | 845 | 834 | 803 | 904 |
| F | CO | 100 | 100 | 106 | 92 | 72 | 69 | 95 | 73 | 87 | 70 | 89 | 95 | 119 | 92 | 96 | 69 | 80 | 90 |
| F | CT | 82 | 102 | 89 | 81 | 63 | 71 | 60 | 72 | 54 | 54 | 55 | 51 | 54 | 43 | 44 | 41 | 48 | 35 |
| F | DC | 15 | 15 | 8 | 8 | 13 | 12 | 19 | 13 | 18 | 16 | 19 | 29 | 41 | 15 | 19 | 22 | 22 | 17 |
| F | DE | 19 | 12 | 25 | 12 | 19 | 18 | 14 | 18 | 14 | 14 | 10 | 16 | 23 | 18 | 15 | 17 | 14 | 16 |
| F | FL | 253 | 308 | 224 | 241 | 220 | 244 | 235 | 213 | 228 | 225 | 239 | 265 | 290 | 248 | 246 | 270 | 244 | 262 |
| F | GA | 196 | 215 | 173 | 209 | 176 | 147 | 166 | 169 | 157 | 164 | 159 | 200 | 226 | 197 | 154 | 185 | 199 | 165 |
| F | HI | 6 | 15 | 12 | 12 | 11 | 6 | 9 | 17 | 6 | 10 | 9 | 15 | 12 | 14 | 6 | 12 | 0 | 11 |
| F | IA | 130 | 127 | 123 | 109 | 104 | 80 | 98 | 101 | 82 | 91 | 78 | 65 | 56 | 49 | 49 | 50 | 45 | 40 |
| F | ID | 19 | 29 | 29 | 16 | 20 | 30 | 22 | 32 | 33 | 24 | 21 | 24 | 32 | 29 | 27 | 23 | 21 | 20 |
| F | IL | 364 | 452 | 362 | 295 | 320 | 334 | 262 | 307 | 275 | 263 | 248 | 275 | 284 | 246 | 202 | 203 | 220 | 198 |
| F | IN | 239 | 238 | 241 | 198 | 198 | 200 | 182 | 180 | 183 | 156 | 146 | 161 | 128 | 135 | 132 | 136 | 115 | 99 |
| F | KS | 75 | 109 | 77 | 81 | 85 | 105 | 72 | 80 | 75 | 66 | 60 | 64 | 66 | 67 | 63 | 57 | 35 | 46 |
| F | KY | 130 | 156 | 137 | 134 | 129 | 126 | 123 | 85 | 104 | 112 | 108 | 104 | 82 | 98 | 82 | 73 | 56 | 66 |
| M | KY | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 20 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| F | LA | 109 | 98 | 113 | 64 | 76 | 65 | 74 | 65 | 73 | 49 | 48 | 64 | 53 | 51 | 50 | 48 | 50 | 59 |
| F | MA | 190 | 201 | 174 | 142 | 179 | 148 | 147 | 146 | 129 | 113 | 106 | 104 | 85 | 72 | 74 | 86 | 66 | 56 |
| F | MD | 145 | 168 | 153 | 121 | 131 | 129 | 119 | 110 | 90 | 116 | 134 | 116 | 141 | 131 | 97 | 96 | 115 | 117 |
| F | ME | 37 | 33 | 30 | 15 | 22 | 20 | 20 | 16 | 14 | 19 | 18 | 16 | 19 | 19 | 13 | 11 | 10 | 8 |
| F | MI | 338 | 339 | 297 | 291 | 246 | 269 | 261 | 253 | 225 | 215 | 208 | 197 | 168 | 166 | 160 | 124 | 120 | 137 |
| F | MN | 191 | 191 | 186 | 175 | 154 | 146 | 142 | 147 | 109 | 120 | 129 | 128 | 113 | 109 | 109 | 71 | 77 | 77 |
| F | MO | 211 | 256 | 228 | 199 | 171 | 153 | 191 | 155 | 171 | 169 | 115 | 158 | 134 | 140 | 118 | 104 | 122 | 87 |
| F | MS | 61 | 51 | 56 | 43 | 54 | 56 | 41 | 49 | 46 | 47 | 41 | 41 | 38 | 33 | 45 | 36 | 39 | 40 |
| F | MT | 15 | 22 | 15 | 16 | 8 | 11 | 14 | 7 | 11 | 12 | 10 | 17 | 17 | 8 | 15 | 13 | 8 | 7 |
| F | NC | 216 | 220 | 199 | 187 | 177 | 206 | 166 | 175 | 156 | 164 | 150 | 219 | 238 | 226 | 188 | 183 | 189 | 176 |
| F | ND | 27 | 30 | 20 | 22 | 18 | 20 | 20 | 11 | 14 | 17 | 10 | 18 | 10 | 14 | 7 | 10 | 8 | 9 |
| F | NE | 62 | 68 | 56 | 56 | 44 | 63 | 50 | 47 | 50 | 45 | 35 | 35 | 30 | 40 | 35 | 33 | 27 | 31 |
| F | NH | 32 | 38 | 32 | 19 | 32 | 34 | 30 | 21 | 28 | 37 | 14 | 11 | 18 | 15 | 20 | 12 | 9 | 10 |
| F | NJ | 216 | 223 | 225 | 177 | 189 | 174 | 158 | 177 | 166 | 162 | 143 | 171 | 187 | 160 | 115 | 131 | 140 | 138 |
| F | NM | 27 | 21 | 17 | 15 | 21 | 24 | 13 | 14 | 15 | 15 | 14 | 43 | 26 | 24 | 23 | 28 | 26 | 33 |
| F | NV | 22 | 33 | 40 | 29 | 27 | 38 | 32 | 24 | 24 | 40 | 43 | 50 | 67 | 50 | 43 | 52 | 53 | 62 |
| F | NY | 412 | 408 | 335 | 315 | 322 | 335 | 311 | 288 | 278 | 281 | 284 | 299 | 351 | 336 | 307 | 271 | 291 | 323 |
| F | OH | 496 | 477 | 431 | 398 | 376 | 372 | 343 | 307 | 335 | 311 | 255 | 263 | 240 | 198 | 185 | 154 | 175 | 171 |
| F | OK | 90 | 107 | 98 | 87 | 81 | 81 | 75 | 93 | 74 | 69 | 78 | 67 | 69 | 80 | 76 | 69 | 72 | 55 |
| F | OR | 67 | 84 | 86 | 55 | 67 | 81 | 72 | 66 | 71 | 55 | 64 | 62 | 61 | 52 | 56 | 61 | 58 | 68 |
| F | PA | 343 | 402 | 324 | 281 | 283 | 296 | 261 | 247 | 242 | 214 | 216 | 203 | 206 | 148 | 161 | 176 | 159 | 145 |
| F | RI | 26 | 25 | 24 | 21 | 27 | 26 | 19 | 14 | 14 | 14 | 12 | 15 | 18 | 16 | 7 | 6 | 11 | 11 |
| F | SC | 78 | 87 | 72 | 58 | 66 | 60 | 53 | 58 | 74 | 71 | 62 | 79 | 65 | 76 | 53 | 79 | 81 | 56 |
| F | SD | 31 | 33 | 22 | 27 | 17 | 33 | 22 | 26 | 23 | 21 | 16 | 14 | 11 | 19 | 16 | 17 | 12 | 16 |
| F | TN | 201 | 203 | 157 | 158 | 139 | 129 | 122 | 149 | 124 | 123 | 130 | 127 | 141 | 96 | 129 | 131 | 113 | 116 |
| F | TX | 482 | 497 | 484 | 447 | 457 | 440 | 402 | 465 | 407 | 435 | 469 | 656 | 842 | 714 | 657 | 724 | 817 | 797 |
| F | UT | 72 | 83 | 76 | 73 | 61 | 74 | 55 | 64 | 60 | 51 | 55 | 69 | 66 | 56 | 52 | 55 | 61 | 42 |
| F | VA | 210 | 227 | 210 | 176 | 173 | 152 | 164 | 187 | 154 | 173 | 172 | 221 | 196 | 190 | 163 | 152 | 148 | 152 |
| F | VT | 13 | 15 | 12 | 15 | 9 | 11 | 7 | 11 | 7 | 6 | 10 | 0 | 0 | 0 | 6 | 0 | 8 | 5 |
| F | WA | 121 | 122 | 117 | 116 | 125 | 111 | 102 | 95 | 91 | 110 | 113 | 109 | 120 | 94 | 101 | 136 | 73 | 100 |
| F | WI | 163 | 183 | 193 | 150 | 159 | 161 | 154 | 135 | 148 | 125 | 117 | 112 | 100 | 102 | 121 | 83 | 79 | 82 |
| F | WV | 56 | 67 | 58 | 56 | 52 | 62 | 59 | 50 | 44 | 33 | 40 | 38 | 38 | 35 | 38 | 21 | 38 | 28 |
| F | WY | 5 | 8 | 6 | 10 | 6 | 8 | 6 | 8 | 10 | 8 | 7 | 12 | 10 | 12 | 5 | 8 | 8 | 5 |
allison_f <- temp |>
filter(Sex == 'F') |>
select(State:`2014`)
allison_f |>
summarise(across(.col = `1997`:`2014`,
.fns = ~sum(.x))) |>
pivot_longer(cols = `1997`:`2014`,
values_to = 'Count') |>
ggplot(mapping = aes(x = `name`,
y = `Count`)) +
geom_point() +
labs(x = 'Year',
y ='',
title = 'Number of AFAB kids named Allison in US by year')allison_linear <- allison_f |>
summarise(across(.col = `1997`:`2014`,
.fns = ~sum(.x))) |>
pivot_longer(cols = `1997`:`2014`,
values_to = 'Count') |>
mutate(`name` = as.numeric(`name`)) |>
lm(`Count` ~ `name`, data = _)
allison_linear |>
broom::augment() |>
ggplot(mapping = aes(y = .resid, x = .fitted)) +
geom_point()regression equation: 209690 - 102x = y
Residuals line:
Since there are long stretches of negative residuals on the plot and it is a time series, there could be a violation of the independence assumption
I believe that the name is going out of favor because at a 5% significance there was enough evidence to conclude that there is a negative linear association between years and kids named Allision. Additionally, 58% of the variance in the mean number of kids named Allision is due to time, which is a good amount.
names <- c('Allan', 'Alan', 'Allen')
Anames |>
filter(Name %in% names) |>
filter(Gender == 'M') |>
group_by(Name,
Year) |>
summarise(`Count` = sum(`Count`)) |>
ggplot(mapping = aes(x = `Year`,
y = `Count`)) +
geom_point()`summarise()` has grouped output by 'Name'. You can override using the
`.groups` argument.
CAvsPA <- Anames |>
filter(Name %in% names,
Gender == 'M',
Year == '2000',
State == 'CA' | State == 'PA') |>
select(!(Gender | Year)) |>
pivot_wider(names_from = `Name`,
values_from = `Count`)
# table
CAvsPA |>
kable(format = "html",
caption = "==>Occurance of the 3 Alan names in PA and CA<==") |>
kable_classic(html_font = "FiraCode Nerd Font")| State | Alan | Allen | Allan |
|---|---|---|---|
| CA | 579 | 176 | 131 |
| PA | 51 | 56 | 12 |
CA <- CAvsPA[1, ]
PA <- CAvsPA[2, ]
percents <- function(x){
temp <- x[1]
x[1] <- 0
tot <- sum(x)
x <- x / tot
x[1] <- temp
return(x)
}
# percents
percs <- percents(CA)
#rbind usuage found: https://www.statology.org/r-append-to-data-frame/
rbind(percs, percents(PA)) |>
kable(format = "html",
caption = "==>Occurance of the name Allison by state and year in %<==") |>
kable_classic(html_font = "FiraCode Nerd Font")| State | Alan | Allen | Allan |
|---|---|---|---|
| CA | 0.6534989 | 0.1986456 | 0.1478555 |
| PA | 0.4285714 | 0.4705882 | 0.1008403 |